####################################################################################
## xu xian feng
## 2022/03/30
## STAD analysis
####################################################################################

source ~/20220915_gastric_multiple/dna_combinePublic/config/config.sh
export config_file=~/20220915_gastric_multiple/dna_combinePublic/config/config.sh

## 20230526
## CNV重新检查的样本
## JZGCWES16、JZGCWES153
## S43、JZ585T3

## grep -w -E "JZGCWES16|JZGCWES153|S43|JZ585T3|S9|JZGCWES138"

####################################################################################
## 得到NMU的数据信息
sh ${scripts_path}/module/pipeline_nmu.sh
## Mutation的按照是否与胃癌共享定义为超早期
${Rscript} ${scripts_path}/plot/MutationTime_NewTime.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${MutationTime_path}/result

## MSI数据整理（新增）
sh ${scripts_path}/module/pipeline_nmu_msi.sh

####################################################################################
## 所有最后分析的所有样本列表
cat ${config_path}/tumor_normal.class.list > ${config_path}/tumor_normal.class.MSS_MSI.list
cat ${config_path}/tumor_normal.class.MSI.list | grep -v  -w "ID" >> ${config_path}/tumor_normal.class.MSS_MSI.list

####################################################################################
#### 整理基线
## 包含MSI的样本
## 整理年龄、性别、吸烟、饮酒等信息
## 注释整体和CDS的覆盖区域、倍体、纯度、质控信息
${Rscript_mutationTime} ${scripts_path}/CompareBaseLine.R \
--tumor_list ${config_path}/tumor_normal.class.MSS_MSI.list \
--baseline_file ${config_path}/STAD_MutipleReigon_baseline.addAlcoholFreq.tsv \
--qc_file ${Qc_path}/Summary_Qc.tsv \
--burden_all_file ${Qc_path}/Burden.coverage10x.Autosomal.txt \
--burden_cds_file ${Qc_path}/Burden.coverage10x.Autosomal.cds.txt \
--purity_file ${Titan_path}/Purity_titan.final.tsv \
--out_dir ${config_path}

## IM、IGC和DGC的突变负荷的比较
## 同一个人的比较
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.addMSI.R \
--info_file ${config_path}/STAD-useCombine.Sample.tsv \
--maf_file ${maf_path}/All_ForMutBurden.extract.maf \
--maf_msi_file ${maf_path}/All_ForMutBurden.extract.MSI.maf \
--images_path ${Images_path}/mutBurden


##################################### 标记样本的突变负荷
## 其它数据无对应信息仅在NMU中进行比较
ln -snf ${Images_path}/mutBurden/mutBurden.tsv ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.tsv

## 非MSI的患者其突变情况
msi_high_sample=`cat ${Qc_path}/Summary_Qc.tsv | grep -v Tumor | awk -F'\t' '{if($8 > 10)print $2}' | sort -u |\
tr '\n' '|' | sed 's/|$//'`
msi_high_sample=`echo "${msi_high_sample}|JRGC00009"`

cat ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.tsv | grep -v -E -w ${msi_high_sample} \
> ${baseTable_path}/STAD_Info.addBurden.MSS.tsv

<<EOF
## 突变负荷与年龄的关系
## 年龄为连续性变量
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.Age.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSS.tsv \
--images_path ${Images_path}/mutBurden
EOF

<<EOF
## 突变负荷与饮酒的程度
认为基线的可能不准
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.AlcoholFreq.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.tsv \
--images_path ${Images_path}/mutBurden
EOF

##################################### gistic
## 用于分子分型
## NMU所有样本，包含了MSI的样本
sh ${scripts_path}/gistic2/gistic2.CIN.sh
## 判断CNV的亚型
${Rscript} ${scripts_path}/gistic2/classify_CIN.R \
--sample_file ${config_path}/tumor_normal.class.list \
--burden_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.tsv \
--gistic_file ${work_dir}/gistic/all_lesions.conf_99.txt \
--out_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv

####################################################################################
#### 整理公共数据库的基线
## 比较不同来源样本的突变负荷
## 年龄、性别、吸烟包年等信息整理好
## NJMU有5个样本既有IGC又有DGC，突变负荷和驱动基因鉴定的时候不纳入
## 新增MSI患者纳入
## MutationInfo.combine.tsv
## 画好突变负荷
${Rscript} ${scripts_path}/comparePublicData/CombinePublicData.R \
--njmu_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.tsv \
--tcga_file ${work_dir}/public_ref/TCGA/TCGA_STAD.TMB.tsv \
--tcga_clinic_file ${work_dir}/public_ref/TCGA/clinical_PANCAN_patient_with_followup.tsv \
--tcga_msi_file  ${work_dir}/public_ref/TCGA/TCGA_STAD_msi.TMB.tsv \
--oncoSG_file ${work_dir}/public_ref/OncoSG/OncoSG_STAD.TMB.tsv \
--oncoSG_clinic_file ${work_dir}/public_ref/OncoSG/OncoSG_STAD.followup.tsv \
--oncoSG_msi_file ${work_dir}/public_ref/OncoSG/OncoSG_STAD_msi.TMB.tsv \
--tmucih_file ${work_dir}/public_ref/TMUCIH/TMUCIH_STAD.TMB.tsv \
--tmucih_msi_file ${work_dir}/public_ref/TMUCIH/TMUCIH_STAD_msi.TMB.tsv \
--utokyo_file ${work_dir}/public_ref/utokyo/utokyo_STAD.TMB.tsv \
--out_path ${work_dir}/public_ref/combine

## 增加CIN的分型
## MutationInfo.combine.addMolecularSubType.tsv
${Rscript} ${scripts_path}/comparePublicData/CombinePublicData.addCNVType.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--njmu_clinic_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--tcga_clinic_file ${work_dir}/public_ref/TCGA/stad_tcga_pan_can_atlas_2018_clinical_data.tsv \
--oncoSG_clinic_file ${work_dir}/public_ref/OncoSG/stad_oncosg_2018_clinical_data.tsv \
--tmucih_clinic_file ${work_dir}/public_ref/TMUCIH/egc_tmucih_2015_clinical_data.tsv \
--out_path ${work_dir}/public_ref/combine

## 20230427
## 标记样本人种
${Rscript} ${scripts_path}/plot/mutBurden.population.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--tcga_file ${work_dir}/public_ref/TCGA/stad_tcga_pan_can_atlas_2018_clinical_data.tsv \
--out_path ${work_dir}/public_ref/combine
## MutationInfo.combine.addMolecularSubType.Race.tsv

## 基线可视化
${Rscript_mutationTime} ${scripts_path}/CompareBaseLine.CombinePublic.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--out_dir ${work_dir}/public_ref/combine

## 不同CIN亚型的突变负荷
${Rscript} ${scripts_path}/plot/mutBurden.MolecularType.R \
--input_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--out_path ${work_dir}/public_ref/combine
## MutationBurden.compare.IGC_DGC.MolecularType.pdf

## 20230413
## 标记共享和私有的突变负荷
${Rscript} ${scripts_path}/plot/mutBurden.addshareAndPrivate.R \
--input_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--burden_shareprivate_file ${Images_path}/mutBurden/mutBurden.share_private.tsv \
--out_path ${work_dir}/baseTable

## 计算私有和共享突变的负荷,使用QC
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.addMSI.share_private.R \
--info_file ${config_path}/STAD-useCombine.Sample.tsv \
--maf_file ${maf_path}/All_ForMutBurden.extract.maf \
--maf_msi_file ${maf_path}/All_ForMutBurden.extract.MSI.maf \
--images_path ${Images_path}/mutBurden

## MSI、GS、CIN的IM的突变负荷比较
## CDS、CDS的Trunk、CDS的Private
burden_type_all=("All"  "Trunk" "Private")
for burden_type in ${burden_type_all[@]}
do
echo ${burden_type}
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.IM_MolecularType.R \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.addPrivateShare.tsv \
--images_path ${Images_path}/mutBurden \
--burden_type ${burden_type}
done

## 突变负荷与性别、吸烟、饮酒、HP的关系
${Rscript} ${scripts_path}/mutBurden/mutBurden_plot.Baseline.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${Images_path}/mutBurden/baseline

##################################### 合并公共数据库的maf文件
## 得到最后使用的maf列表
## 同一人同一类型样本突变合并
## 用于突变率的估计和显著突变基因的计算
## 去除IGC+DGC的样本
## All_use.IM.maf
## All_use.maf
sh ${scripts_path}/module/getmaf_public_unique.sh

## MSI的
sh ${scripts_path}/module/getmaf_public_unique.msi.sh

## 同一人突变保留
## All_use.addVAF.maf
sh ${scripts_path}/module/getmaf_public_allSample.sh
## All_use.addVAF.maf和All_use.maf的主要差别在于
## 1、All_use.addVAF.maf增加了t_alt_count和t_ref_count
## 2、All_use.addVAF.maf中NMU的样本为原始的ID号，同一个人多个样本保留，IM的样本也在里面，IGC+DGC的样本也在里面；
## 3、All_use.maf中为NMU的GC样本，无IGC+DGC的5个人，All_use.IM.maf中为所有人IM样本
## MSI的患者
sh ${scripts_path}/module/getmaf_public_allSample.msi.sh
## All_use.addVAF.MSI.MSI.maf，为MSI的患者

## CNV的除了NMU的只有TCGA存在
sh ${scripts_path}/module/getCNV_public.sh

## 提取RNA
## 1、提取TCGA明确laruen分型的样本
## 2、与NJMU所有样本去除批次效应
## CombineTMM.DNAUse.NJMU_TCGA.MergeMutiSample.tsv，合并NJMU的多样本，用于分析表达变化
## CombineTMM.DNAUse.NJMU_TCGA.tsv，提取用到的NJMU样本，用于分析突变情况
## TCGA.FilterLowExpression.TMM.tsv，TCGA单独的表达矩阵
## NJMU.FilterLowExpression.TMM.tsv，NMU单独的表达矩阵
## NJMU.FilterLowExpression.MergeMutiSample.TMM.tsv，NMU单独的表达矩阵
sh ${scripts_path}/module/getRNAData.sh

####################################################################################
## 单细胞数据预处理
## 注释上皮细胞的亚类
${Rscript_singlecell} ${scripts_path}/singlecell/epithelium_annotation.R \
--single_cell_file ${work_dir}/public_ref/singleCell/pbmc_MT20_nor_PCA_50_RE0.3.Rdata \
--out_path ${Images_path}/singleCell

## 标记感兴趣的基因集合
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.R.R \
--single_cell_file ${work_dir}/public_ref/singleCell/pbmc_MT20_nor_PCA_50_RE0.3.Rdata \
--out_path ${Images_path}/singleCell

##################################### 突变信号总体的
## 得到突变信号
<<EOF
## 老的版本
sh ${scripts_path}/module/sigprofile_combine.sh


## 突变信号分布画图
## 总的
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.combine.R \
--work_dir ${SigProfiler_path}/decompose/ \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--images_path ${SigProfiler_path}/plot

## 分不同来源看突变信号构成是否一致
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.combine.From.R \
--work_dir ${SigProfiler_path}/decompose/ \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--images_path ${SigProfiler_path}/plot

EOF

## 新版本把所有vcf放在一个目录下面
## 突变信号稳定
sh ${scripts_path}/module/sigprofile_AllVcf.sh

<<EOF
## 整理突变信号占比同时标记样本类型
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.GetRatio.R \
--sig_file ${SigProfiler_path}/decompose_allUSE/combine_SBS96.txt \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--images_path ${SigProfiler_path}/decompose_allUSE

## NJMU中看share、trunk和private
## 不同的分子亚型
mol_type_list=("MSS" "MSI" "CIN" "GS")
for mol_type in ${mol_type_list[@]}
do
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.Share.R \
--input_file ${SigProfiler_path}/decompose_allUSE/combine_SBS96.ratio.addInfo.tsv \
--mol_type ${mol_type} \
--images_path ${SigProfiler_path}/plot_allUSE

## IGC和DGC的合并
## 总的以及不同来源的展示
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.From.R \
--input_file ${SigProfiler_path}/decompose_allUSE/combine_SBS96.ratio.addInfo.tsv \
--mol_type ${mol_type} \
--images_path ${SigProfiler_path}/plot_allUSE
done

## 每个人的突变信号组成
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.EverySample.R \
--input_file ${SigProfiler_path}/decompose_allUSE/combine_SBS96.ratio.addInfo.tsv \
--images_path ${SigProfiler_path}/plot_allUSE

## MSI患者的肠化中的POL突变确认
${Rscript} ${scripts_path}/plot/mutBurden.MolecularType.MSIDriver.R \
--mutshare_file ${Images_path}/mutRate/MutShare.AllPoint.tsv \
--mutshare_msi_file ${Images_path}/mutRateMSI/MutShare.AllPoint.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--ccf_msi_file ${MutationTime_path}/result/All_CCF_mutTime.MSI.tsv \
--input_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--ddrPathway_file ~/ref/Pathway/DDR.list \
--class_sub_file ${config_path}/Class_order_sub.list \
--images_path ${SigProfiler_path}/plot_allUSE
EOF

##################################### 突变信号总体的
## 突变信号此处用总的
sh ${scripts_path}/module/sigprofile_AllVcf.AllMut.sh

${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.GetRatio.AllMut.R \
--sig_msi_file ${SigProfiler_path}/decompose_allUSE_AllMut_MSI/combine_SBS96.txt \
--sig_mss_file ${SigProfiler_path}/decompose_allUSE_AllMut_MSS/combine_SBS96.txt \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--images_path ${SigProfiler_path}/decompose_allUSE_AllMut

## NJMU中看share、trunk和private
## 不同的分子亚型
mol_type_list=("MSS" "MSI" "CIN" "GS")
for mol_type in ${mol_type_list[@]}
do
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.Share.R \
--input_file ${SigProfiler_path}/decompose_allUSE_AllMut/combine_SBS96.ratio.addInfo.tsv \
--mol_type ${mol_type} \
--images_path ${SigProfiler_path}/plot_allUSE_AllMut

## IGC和DGC的合并
## 总的以及不同来源的展示
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.From.R \
--input_file ${SigProfiler_path}/decompose_allUSE_AllMut/combine_SBS96.ratio.addInfo.tsv \
--mol_type ${mol_type} \
--images_path ${SigProfiler_path}/plot_allUSE_AllMut
done

## 每个人的突变信号组成
${Rscript} ${scripts_path}/sigprofile/Sigprofiler_decompose_plot_v2.AllVcf.EverySample.R \
--input_file ${SigProfiler_path}/decompose_allUSE_AllMut/combine_SBS96.ratio.addInfo.tsv \
--images_path ${SigProfiler_path}/plot_allUSE_AllMut

####################################################################################
## 拷贝数的比较
## 排除MSI的患者
## CNV的负荷
from_type=("All" "NJMU" "TCGA")
for from in ${from_type[@]}
do
${Rscript} ${scripts_path}/plot/cnvBurden_plot.combine.R \
--from ${from} \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--seg_public_file ${work_dir}/seg_public/TCGA_use.seg \
--class_order_file ${config_path}/Class_order.list \
--images_path ${Images_path}/cnv_burden
done

## NJMU的不同类型的肠化样本比较
${Rscript} ${scripts_path}/plot/cnvBurden_plot.IM.R \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--class_order_file ${config_path}/Class_order.list \
--images_path ${Images_path}/cnv_burden

## CNV的染色体分布
## NJMU和TCGA的IGC、DGC的两种CNV亚型改变
${Rscript} ${scripts_path}/plot/cnvDistribution_plot.combine.R \
--sample_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--seg_file ${Titan_path}/Titan_all_seg.final.tsv \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--seg_public_file ${work_dir}/seg_public/TCGA_use.seg \
--class_order_file ${config_path}/Class_order.list \
--images_path ${Images_path}/cnv_burden


####################################################################################
## 驱动基因的鉴定
####################################################################################
## 分IGC、DGC、IM分别鉴定显著突变基因
for class in ${class_type[@]}
do
echo $class
sh ${scripts_path}/mutsig/Mutsig_3_mutsigcv.sh ${class}
done

## GC的放在一起鉴定鉴定显著突变基因
sh ${scripts_path}/mutsig/Mutsig_3_mutsigcv.GC.sh

####################################################################################
##################################### 所有基因突变率的计算，IM、IGC、DGC、GC
## MutRate.tsv
${Rscript} ${scripts_path}/plot/mutRate_compute.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv

## 所有多次出现突变位点的突变率的计算，IM、IGC、DGC、GC
${Rscript} ${scripts_path}/plot/mutRate_compute.RecurrentPoint.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv

## 不同亚型的IM突变率的计算
${Rscript} ${scripts_path}/plot/mutRate_compute.IM.R \
--maf_file ${maf_path}/All_GGA.all.maf \
--images_path ${Images_path}/mutRate \
--info_file ${config_path}/tumor_normal.class.list

## 所有多次出现突变位点的突变率的计算，不同胃癌亚型的IM
${Rscript} ${scripts_path}/plot/mutRate_compute.RecurrentPoint.IM.R \
--maf_file ${maf_path}/All_GGA.all.maf \
--images_path ${Images_path}/mutRate \
--info_file ${config_path}/tumor_normal.class.list

##################################### 所有基因突变率的计算，IM、IGC、DGC、GC
## 不同人种
## MutRate.tsv
${Rscript} ${scripts_path}/plot/mutRate_compute.race.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--maf_cancer_msi_file ${maf_public_path}/All_use.msi.maf \
--maf_im_msi_file ${maf_public_path}/All_use.IM.msi.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.Race.tsv

## 比较不同人种突变率的差异
${Rscript} ${scripts_path}/plot/mutRate_plot.race.R \
--smg_file ${mutsig_check_path}/smg.list \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.race.tsv \
--images_path ${Images_path}/mutRatePlot

####################################################################################
##################################### 所有基因突变率的计算，IM、IGC、DGC、GC(不同分子亚型)
## MutRate.molType.tsv
${Rscript} ${scripts_path}/plot/mutRate_compute.molType.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--maf_cancer_msi_file ${maf_public_path}/All_use.msi.maf \
--maf_im_msi_file ${maf_public_path}/All_use.IM.msi.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv

## 不同亚型的IM
## MutRate.molType.IM.tsv
${Rscript} ${scripts_path}/plot/mutRate_compute.molType.IM.R \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--maf_im_msi_file ${maf_public_path}/All_use.IM.msi.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv

${Rscript} ${scripts_path}/plot/mutRate_compute.RecurrentPoint.molType.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--maf_cancer_msi_file ${maf_public_path}/All_use.msi.maf \
--maf_im_msi_file ${maf_public_path}/All_use.IM.msi.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv

<<EOF
## 所有多次出现突变位点的突变率的计算，IM、IGC、DGC、GC
${Rscript} ${scripts_path}/plot/mutRate_compute.RecurrentPoint.R \
--maf_cancer_file ${maf_public_path}/All_use.maf \
--maf_im_file ${maf_public_path}/All_use.IM.maf \
--images_path ${Images_path}/mutRate \
--info_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv

## 不同亚型的IM突变率的计算
${Rscript} ${scripts_path}/plot/mutRate_compute.IM.R \
--maf_file ${maf_path}/All_GGA.all.maf \
--images_path ${Images_path}/mutRate \
--info_file ${config_path}/tumor_normal.class.list

## 所有多次出现突变位点的突变率的计算，不同胃癌亚型的IM
${Rscript} ${scripts_path}/plot/mutRate_compute.RecurrentPoint.IM.R \
--maf_file ${maf_path}/All_GGA.all.maf \
--images_path ${Images_path}/mutRate \
--info_file ${config_path}/tumor_normal.class.list
EOF

####################################################################################
## MSI、GS、CIN的IM的共享和私有功能性突变的CCF分析
${Rscript} ${scripts_path}/plot/mutBurden.MolecularType.ShareRate.R \
--mutshare_file ${Images_path}/mutRate/MutShare.AllPoint.tsv \
--mutshare_msi_file ${Images_path}/mutRateMSI/MutShare.AllPoint.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--ccf_msi_file ${MutationTime_path}/result/All_CCF_mutTime.MSI.tsv \
--input_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${Images_path}/mutBurden


####################################### 检查显著驱动基因的突变率在不同来源样本的突变情况
## ${mutsig_check_path}/driver.summary.tsv
${Rscript} ${scripts_path}/mutsig/Mutsig_combineInfo.R \
--sig_igc_file ${MutsigOut_path}/IGC/sig_genes.txt \
--sig_dgc_file ${MutsigOut_path}/DGC/sig_genes.txt \
--sig_im_file ${MutsigOut_path}/IM/sig_genes.txt \
--reprot_file ${work_dir}/public_ref/SMG_sort.list \
--mutRate_file ${Images_path}/mutRate/MutRate.tsv \
--out_path ${mutsig_check_path}

## 人工检查去除不可靠的基因
## NMU的检查bam文件
# ${mutsig_check_path}

## 可靠的基因列表
# ${mutsig_check_path}/cancer/cancer.smg.list
# ${mutsig_check_path}/precancer/precancer.smg.list
## 合成一个文件
echo "Gene_Symbol" > ${mutsig_check_path}/smg.list
cat ${mutsig_check_path}/igc_smg.list ${mutsig_check_path}/dgc_smg.list ${mutsig_check_path}/im_smg.list | \
grep -v Gene_Symbol | sort -u \
>> ${mutsig_check_path}/smg.list

##################################### 突变瀑布图
## 展示鉴定的驱动基因的突变率
## 左边*为Tumor鉴定的，右边*为IM鉴定的

${Rscript} ${scripts_path}/plot/waterfull_smg.new.R \
--maf_path ${maf_public_path} \
--images_path ${Images_path} \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--class_order_file ${config_path}/Class_order.list \
--im_list ${mutsig_check_path}/im_smg.list \
--igc_list ${mutsig_check_path}/igc_smg.list \
--dgc_list ${mutsig_check_path}/dgc_smg.list

## 驱动基因突变率在IM和GC、IGC和DGC的计算
#for from in ${from_type[@]}
#do
from=All
${Rscript} ${scripts_path}/plot/mutRate_plot.All.R \
--smg_file ${mutsig_check_path}/smg.list \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.tsv \
--from ${from} \
--images_path ${Images_path}/mutRatePlot
#done

class_type_list=("IM" "IGC" "DGC")
for class_type in ${class_type_list[@]}
do
## 比较饮酒/HP对驱动基因突变率的影响
${Rscript} ${scripts_path}/plot/CompareDriver.grid.R \
--smg_list ${mutsig_check_path}/smg.list \
--input_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--base_line_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--class_type ${class_type} \
--out_path ${Images_path}/CompareGeneList
done

## 共出现和互斥
for class_type in ${class_type_list[@]}
do
## 计算
${Rscript} ${scripts_path}/plot/DriverGene.Exclusive.R \
--maf_path ${maf_public_path} \
--class_type ${class_type} \
--images_path ${Images_path}/Con_Exclusive \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--class_order_file ${config_path}/Class_order.list \
--im_list ${mutsig_check_path}/im_smg.list \
--igc_list ${mutsig_check_path}/igc_smg.list \
--dgc_list ${mutsig_check_path}/dgc_smg.list
## 画图
${Rscript} ${scripts_path}/plot/heatMap.con_exclusive.R \
--class_type ${class_type} \
--input_file ${Images_path}/Con_Exclusive/MutuallyExclusive.${class_type}.tsv  \
--images_path ${Images_path}/Con_Exclusive
done

## 基因突变对突变负荷的影响
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u`
do
${Rscript} ${scripts_path}/plot/mutBurden_plot.DriverGene.R \
--gene ${gene} \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--images_path ${Images_path}/mutBurdenDriverGene
done

<<EOF
## CFTR与HP和饮酒的关联
gene=CFTR
${Rscript} ${scripts_path}/plot/mutBaseline_plot.R \
--gene ${gene} \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--images_path ${Images_path}/mutBaselinePlot/${gene}
EOF

clone_t=0.6
choose_rate=0
class_type_list=("All" "IGC" "DGC" "IGC_DGC")
for class_type in ${class_type_list[@]}
do
## 驱动基因的共享和私有情况
${Rscript} ${scripts_path}/plot/JudgeGeneDriverSharePrivate.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--clone_t ${clone_t} \
--choose_rate ${choose_rate} \
--out_path ${Images_path}/GeneShare

## 驱动基因的共享和私有的二次打击现象
${Rscript} ${scripts_path}/plot/JudgeGeneDriverSharePrivateLOH.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--sample_class_file ${Images_path}/GeneShare/Driver_Trunk.evolutionChoose.${class_type}.tsv \
--out_path ${Images_path}/GeneShare
done

<<EOF
class_type_list=("All" "IGC" "DGC" )
for class_type in ${class_type_list[@]}
do
## 判断IM和GC共享以及GC中发生在早期的克隆突变其LOH相比于其它突变的情况
${Rscript} ${scripts_path}/plot/JudgeGeneDriverLOH.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--out_path ${Images_path}/GeneLOH
done

## 判断IM和GC共享以及GC中发生在早期的克隆突变其表达相比于其它突变的情况
${Rscript} ${scripts_path}/plot/JudgeGeneDriverExpression.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--out_path ${Images_path}/GeneExression \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.MergeMutiSample.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \


## 判断IM和GC共享以及GC中发生的克隆突变其CCF改变如何
${Rscript} ${scripts_path}/plot/JudgeGeneDriverCCF.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--out_path ${Images_path}/GeneCCF 

done

## 驱动基因突变率在不同亚型
mol_type_list=("MSI" "CIN" "GS")
for mol_type in ${mol_type_list[@]}
do
${Rscript} ${scripts_path}/plot/mutRate_plot.MolType.R \
--smg_file ${mutsig_check_path}/smg.list \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.molType.tsv \
--mol_type ${mol_type} \
--images_path ${Images_path}/mutRatePlot
done

## 所有已报道胃癌驱动基因突变情况
## 出现在至少5个样本
${Rscript} ${scripts_path}/plot/waterfull_smg.reportSMG.R \
--maf_path ${maf_path} \
--images_path ${Images_path} \
--info_file ${config_path}/tumor_normal.class.list \
--class_order_file ${config_path}/Class_order.list \
--smg_list ${work_dir}/public_ref/SMG_sort.list 
EOF

##################################### 提取关键基因的突变分布
## 促进IM进展的基因
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u | grep -E "MUC6|BMP6|CFTR"`
do

## 所有样本中，IGC和DGC是否存在差异
## 不包含IM + IGC + DGC的5个样本
${Rscript} ${scripts_path}/plot/Lollipop_variant.IGC_DGC.R \
--gene ${gene} \
--sample_public_info ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--cancer_file ${work_dir}/maf_public/All_use.addVAF.maf \
--cancer_nmu_file ${maf_path}/All_GGA.cancer.maf \
--sample_info ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.annotation.exonNum.gtf \
--out_path ${Images_path}/lollipop/${gene}

#### 突变棒棒糖图
## NMU的样本中
## 比较配对IM及其IGC/DGC的突变分别是否存在差异
${Rscript} ${scripts_path}/plot/Lollipop_variant.R \
--gene ${gene} \
--pre_file ${maf_path}/All_GGA.precancer.maf \
--cancer_file ${maf_path}/All_GGA.cancer.maf \
--sample_info ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.annotation.exonNum.gtf \
--out_path ${Images_path}/lollipop/${gene}_NMU
done

## 三个基因的可以解释多少的IM
## 共出现和互斥情况
${Rscript} ${scripts_path}/plot/waterfull_smg.IM.R \
--maf_path ${maf_public_path} \
--images_path ${Images_path} \
--info_file ${config_path}/tumor_normal.class.list \
--info_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--class_order_file ${config_path}/Class_order.list 


##################################### 提取关键基因的突变特征
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u`
do

##################
## IGC和DGC比较
##################

## 突变率在IGC和DGC的差异，总的以及不同来源的
## IM和GC的差异
${Rscript} ${scripts_path}/plot/mutRate_plot.IGC_DGC.R \
--gene ${gene} \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.tsv \
--mut_rate_point_file ${Images_path}/mutRate/MutRate.RecurrentPoint.tsv \
--images_path ${Images_path}/mutRatePlot/${gene}

## 不同的分子亚型
${Rscript} ${scripts_path}/plot/mutRate_plot.IGC_DGC.molType.R \
--gene ${gene} \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.molType.tsv \
--mut_rate_point_file ${Images_path}/mutRate/MutRate.RecurrentPoint.molType.tsv \
--images_path ${Images_path}/mutRatePlot/${gene}

## 所有样本中，IGC和DGC是否存在差异
## 不包含IM + IGC + DGC的5个样本
${Rscript} ${scripts_path}/plot/Lollipop_variant.IGC_DGC.R \
--gene ${gene} \
--sample_public_info ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--cancer_file ${work_dir}/maf_public/All_use.addVAF.maf \
--cancer_nmu_file ${maf_path}/All_GGA.cancer.maf \
--sample_info ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.annotation.exonNum.gtf \
--out_path ${Images_path}/lollipop/${gene}
##################

##################
## IM的比较
##################
#### 不同病变亚型的IM的突变率的比较 
## 整体基因的和Recurrent突变位点的
${Rscript} ${scripts_path}/plot/mutRate_plot.IM.v2.R \
--gene ${gene} \
--info_file ${config_path}/tumor_normal.class.list \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.IM.tsv \
--mut_rate_point_file ${Images_path}/mutRate/MutRate.RecurrentPoint.IM.tsv \
--images_path ${Images_path}/mutRatePlot/${gene}

#### 分成三种亚型分别比较IM和IGC或DGC的突变率
## 整体基因的和Recurrent突变位点的
${Rscript} ${scripts_path}/plot/mutRate_plot.divide.R \
--gene ${gene} \
--info_file ${config_path}/tumor_normal.class.list \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.Type.tsv \
--mut_rate_point_file ${Images_path}/mutRate/MutRate.RecurrentPoint.Type.tsv \
--images_path ${Images_path}/mutRatePlot/${gene}

## 不同肠化亚型
## MutRate_TP53.IM.molType.pdf
${Rscript} ${scripts_path}/plot/mutRate_plot.IM.molType.R \
--gene ${gene} \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.molType.IM.tsv \
--images_path ${Images_path}/mutRatePlot/${gene}

#### 突变棒棒糖图
## NMU的样本中
## 比较配对IM及其IGC/DGC的突变分别是否存在差异
${Rscript} ${scripts_path}/plot/Lollipop_variant.R \
--gene ${gene} \
--pre_file ${maf_path}/All_GGA.precancer.maf \
--cancer_file ${maf_path}/All_GGA.cancer.maf \
--sample_info ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.annotation.exonNum.gtf \
--out_path ${Images_path}/lollipop/${gene}_NMU

##################

##################

mol_type_list=( "GS" "CIN" )
for mol_type in ${mol_type_list[@]}
do
## 不同胃癌分子亚型
${Rscript} ${scripts_path}/plot/mutBurden_plot.DriverGene.molType.R \
--gene ${gene} \
--mol_type ${mol_type} \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.tsv \
--images_path ${Images_path}/mutBurdenDriverGene
done

######### NMU样本中
#### 在癌前和癌的共享情况
${Rscript} ${scripts_path}/plot/mutShare_plot.R \
--gene ${gene} \
--mut_share_file ${Images_path}/mutRate/MutShare.tsv \
--mut_share_point_file ${Images_path}/mutRate/MutShare.RecurrentPoint.tsv \
--images_path ${Images_path}/mutRatePlot

#### 基因的CCF在样本间的变化情况
## 盒图
${Rscript} ${scripts_path}/plot/mutCCF_plot.R \
--gene ${gene} \
--info_file ${config_path}/STAD-useCombine.Sample.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--images_path ${Images_path}/mutCCF

## 柱状图
${Rscript} ${scripts_path}/plot/mutCCF_plot.bar.R \
--gene ${gene} \
--info_file ${config_path}/STAD-useCombine.Sample.tsv \
--class_sub_file ${config_path}/Class_order_sub.list \
--lollipop_file ${Images_path}/lollipop/${gene}_NMU/${gene}.AllInfo.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--images_path ${Images_path}/mutCCF

#### 关键驱动基因突变与性别、吸烟、饮酒、HP的关系
${Rscript} ${scripts_path}/plot/mutBaseline_plot.R \
--gene ${gene} \
--info_file ${config_path}/STAD_Info.addBurden.MSI_MSS.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--images_path ${Images_path}/mutBaselinePlot/${gene}
#########

#### 基因的表达
## 整体的表达情况
## NMU样本
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.MergeMutiSample.tsv \
--out_path ${Images_path}/expression_nmu \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--gene ${gene}

## 总体的、TCGA和NJMU的
## Normal、IM、IGC、DGC
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.oneImage.R \
--sample_list_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.MergeMutiSample.tsv \
--out_path ${Images_path}/expression/${gene} \
--gene ${gene}

## 突变和非突变的表达
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.MutvsWild.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--out_path ${Images_path}/expression/${gene} \
--gene ${gene}

done

####################################################################################
## 存在recurrent突变的基因，其recurrent突变对表达的影响
## 存在recurrent突变的基因，其recurrent突变对突变负荷的影响

gene_list=( MUC6)
mut_type=( all hotpot nonhotpot )
for gene in ${gene_list[@]}
do
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.MutvsWild.MUC6.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--out_path ${Images_path}/expression/${gene} \
--gene ${gene}

${Rscript} ${scripts_path}/plot/mutBurden_plot.DriverGene.Recurrent.R \
--gene ${gene} \
--input_file ${baseTable_path}/STAD_Info.addBurden.MSI_MSS.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--sample_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--images_path ${Images_path}/mutBurdenDriverGene
done

## scissor探究突变和细胞的关系
<<EOF
export gene=MUC6
echo ${mut_type[@]} | tr ' ' '\n'| xargs -P 3 -i sh -c '
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${Images_path}/singleCell/epi_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell/${gene} \
--mut_type {} \
--gene ${gene}
'
EOF

#############################
## 基于公共数据库
## 尝试TPM
## 以该为标准
export gene=MUC6
echo ${mut_type[@]} | tr ' ' '\n'| xargs -P 2 -i sh -c '
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${Images_path}/singleCell/epi_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell_TPM/${gene} \
--mut_type {} \
--gene ${gene}
'
## 展示MUC6的表达小提琴图
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.R \
--single_cell_file ${Images_path}/singleCell/epi_nor_PCA_50_RE0.5.Rdata \
--gene ${gene} \
--out_path ${Images_path}/singleCell_TPM

## 基于公共数据库
## 所有的SMG
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u -r `
do
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${Images_path}/singleCell/epi_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell_TPM/${gene} \
--mut_type all \
--gene ${gene}
done
#############################
## 基于自测数据
mut_type=( all hotpot nonhotpot )

export gene=MUC6
echo ${mut_type[@]} | tr ' ' '\n'| xargs -P 1 -i sh -c '
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell_TPM_ourData/${gene} \
--mut_type {} \
--gene ${gene}
'

## 所有的SMG
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u -r `
do
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell_TPM_ourData/${gene} \
--mut_type all \
--gene ${gene}
done

<<EOF
gene=MUC6
## 只画scissor+和-的细胞的counts
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.Counts.R \
--input_im_file ${Images_path}/singleCell_TPM_ourData/${gene}/Scissor_STAD_MUC6_mutation.IM.CellRate.all.tsv \
--out_path ${Images_path}/singleCell_TPM_ourData/${gene} \
--type all \
--gene ${gene}

## MUC6细胞比例为10%
## 考虑到IM中MUC6的CCF占比极低
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.Counts.R \
--input_im_file ${Images_path}/singleCell_TPM_ourData/${gene}_10/Scissor_STAD_MUC6_mutation.IM.CellRate.all.tsv \
--out_path ${Images_path}/singleCell_TPM_ourData/${gene}_10 \
--type all \
--gene ${gene}

## 拟时序分析，基于已经推测好的演化时间同时细胞数量的图去除背景细胞
${Rscript_singlecell} ${scripts_path}/singlecell/MutSig_Monocle.plot.R \
--input_im_file ${Images_path}/singleCell_TPM_ourData/${gene}/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--input_igc_file ${Images_path}/singleCell_TPM_ourData/${gene}/Scissor_STAD_MUC6_mutation.IGC.CellRate.all.RData \
--input_dgc_file ${Images_path}/singleCell_TPM_ourData/${gene}/Scissor_STAD_MUC6_mutation.DGC.CellRate.all.RData \
--igc_mono_file ${work_dir}/public_ref/singleCell/njmu/IGC_DEG_MONO800.Rdata \
--dgc_mono_file ${work_dir}/public_ref/singleCell/njmu/DGC_DEG_MONO800.Rdata \
--out_path ${Images_path}/singleCell_TPM_ourData/${gene}_monocle \
--type all \
--gene ${gene}
EOF

<<EOF
#############################
## 所有细胞类型的数据
## 成纤维细胞
for gene in `cat ${mutsig_check_path}/smg.list | grep -v Gene_Symbol | sort -u -r | grep -E "CDH1|RHOA|TP53|ARID1A|MUC6" `
do
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.fibroblasts.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/fibroblasts_all_PCA_50_RE1.Rdata \
--out_path ${Images_path}/singleCell_TPM_ourData_fibroblasts/${gene} \
--mut_type all \
--gene ${gene}
done

## 只看DGC
for gene in `cat ${mutsig_check_path}/dgc_smg.list | grep -v Gene_Symbol | sort -u -r  `
do
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.onlyDGC.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell_TPM_ourData_DGC/${gene} \
--mut_type all \
--gene ${gene}
done
EOF

## 按照突变和野生型计算MUC6突变样本的细胞比例差异
gene=MUC6
gene_list=("MUC6" "CFTR" "BMP6")
for gene in ${gene_list[@]}
do
${Rscript_singlecell} ${scripts_path}/singlecell/compare_CellRatio.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--singleCell_sample_file ${config_path}/singleCell_Sample.list \
--maf_file ${maf_path}/All_GGA.all.maf \
--maf_msi_file ${maf_path}/All_GGA.all.MSI.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCellRatio \
--gene ${gene}
done

## 在有MUC6突变的样本中，计算MUC6突变与哪个细胞相关
${Rscript_singlecell} ${scripts_path}/singlecell/scissor_cell_OurData.MUC6.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTPM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCell_MUC6/${gene} \
--mut_type all \
--gene MUC6

gene_list=("MUC6" "CFTR" "BMP6")
for gene in ${gene_list[@]}
do
## MUC6突变和野生型样本，GKN1和GKN2的表达
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.MutvsWild.MUC6.GKN1_GKN2.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--out_path ${Images_path}/expression/${gene}_GKN1_GKN2 \
--gene ${gene}
done

## MUC6突变和野生型样本，DSC和DSG2的表达
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.MutvsWild.MUC6.DSC2_DSG2.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--out_path ${Images_path}/expression/MUC6_DSC2_DSG2 \
--gene MUC6



## MUC6突变和野生样本的差异表达和通路富集
## 通路富集新加GSEA观察上调和下调通路(还未)
## 不筛选细胞
## 只纳入编码基因
gene=MUC6
${Rscript_singlecell} ${scripts_path}/singlecell/differexpression.MUC6.R \
--single_cell_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--gene ${gene} \
--cds_file ~/ref/PCAWG_Elements/web_hg19/gc19_pc.cds.use.bed \
--pathway_path ~/ref/Pathway/ \
--out_path ${Images_path}/singleCell_MUC6/Diff
## GSEA
${Rscript_gsea} ${scripts_path}/singlecell/gsea.MUC6.R \
--input_file ${Images_path}/singleCell_MUC6/Diff/DiffGene.Pit.tsv \
--pathway_path ~/ref/Pathway/ \
--out_path ${Images_path}/singleCell_MUC6/Diff/GSEA

## 展示MUC6和GKN1/GKN2的表达分布
gene=MUC6
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.MUC6.R \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--gene ${gene} \
--out_path ${Images_path}/singleCell_MUC6_vln



## 展示所有MSS样本中GKN1/GKN2的表达小提琴图
## JZ762突变未纳入分析
## JZ740、JR004、JR009、JR039是MSI（纳入）
gene=GKN1_GKN2
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.R \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--gene ${gene} \
--out_path ${Images_path}/singleCell_MUC6_vln/GKN

## GKN1/GKN2无MUC6突变的Pit中也存在
## 观察该类Pit相比于其它Pit，高表达GKN1/GKN2的细胞比例是否存在差异
for gene in `cat /public/home/xxf2019/20220915_gastric_multiple/dna_combinePublic/images/singleCell_MUC6/Diff/DiffGene.Pit.high.tsv | grep -v gene  | awk -F'\t' '{print $8}'`
do
${Rscript_singlecell} ${scripts_path}/singlecell/compare_PitCellRatio.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--singleCell_sample_file ${config_path}/singleCell_Sample.list \
--single_cell_scissor_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCellRatio \
--gene ${gene}
done

## GKN1_GKN2均高表达的细胞在MUC6突变相关Pit是否存在差异
## 合并其它细胞为一类
${Rscript_singlecell} ${scripts_path}/singlecell/compare_PitCellRatio.GKN1_GKN2.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--singleCell_sample_file ${config_path}/singleCell_Sample.list \
--single_cell_scissor_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCellRatio \
--gene "GKN1_GKN2"


## 是否存在其它MUC6突变Pit中显著高表达的Pit
## 1、其它正常的IM中无表达
## 2、bulk转录组测序，MUC6突变型中显著高表达
## 3、存在受体和配体的互作


## MUC6突变的样本中细胞互作
## 转格式
${Rscript_cpbd} ${scripts_path}/singlecell/cellphonedb.R \
--single_cell_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--out_path ${Images_path}/singleCell_MUC6/cpdb
## 运行cellphonedb
${python_cpbd} ${scripts_path}/singlecell/cellphonedb.py
## 可视化
${Rscript_cpbd} ${scripts_path}/singlecell/ktplots.R \
--pvals_file ${Images_path}/singleCell_MUC6/cpdb/statistical_analysis_pvalues_IM_MUC6.txt \
--means_file ${Images_path}/singleCell_MUC6/cpdb/statistical_analysis_means_IM_MUC6.txt \
--out_path ${Images_path}/singleCell_MUC6/cpdb

gene_list=( DSC2 DSG2 )
for gene in ${gene_list[@]}
do
## 检查观察到的受体配体互作的基因的表达是否在突变型Pit更高
${Rscript_singlecell} ${scripts_path}/singlecell/compare_PitCellRatio.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--singleCell_sample_file ${config_path}/singleCell_Sample.list \
--single_cell_scissor_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--single_cell_file ${work_dir}/public_ref/singleCell/njmu/epiall_nor_PCA_50_RE0.5.Rdata \
--out_path ${Images_path}/singleCellRatio \
--gene ${gene}

## 检查观察到的受体配体互作的基因的表达分布
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.MUC6.R \
--single_cell_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--gene ${gene} \
--out_path ${Images_path}/singleCell_MUC6_vln
done

## 检查细胞是否属于Pit
gene_list=(PGC MUC5AC TFF1 TFF2)
for gene in ${gene_list[@]}
do
${Rscript_singlecell} ${scripts_path}/singlecell/showGene.vln.MUC6.R \
--single_cell_file ${Images_path}/singleCell_MUC6/Scissor_STAD_MUC6_mutation.IM.CellRate.all.RData \
--gene ${gene} \
--out_path ${Images_path}/singleCell_MUC6_vln
done

## MUC6该样本在743样本中的表达
gene=MUC6
${Rscript} ${scripts_path}/mRNA/showGene.Normalize.MutvsWild.SinlgeSample.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv \
--rsem_file ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv \
--maf_public_file ${work_dir}/maf_public/All_use.addVAF.maf \
--out_path ${Images_path}/expression/${gene} \
--use_mut_sample JZGC00732 \
--gene ${gene}

## 20230727
## 比较IM发生MUC6突变的患者和未发生的，其SMG的突变率是否存在差异
gene=MUC6
type_class="IM"
${Rscript} ${scripts_path}/plot/mutRate_compute.Gene.R \
--input_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--type_class ${type_class} \
--info_file ${work_dir}/baseTable/STAD_Info.addBurden.MSI_MSS.addCNVType.tsv \
--images_path ${Images_path}/mutRateGene/${gene}_${type_class} \
--driver_gene_list ${mutsig_check_path}/All_driver.list \
--gene ${gene}
## 为看见SMG突变率存在差异

####################################################################################
## TP53的专门比较
## TP53突变率在IGC和DGC的差异
${Rscript} ${scripts_path}/driverGene/mutRate_plot.TP53.R \
--gene TP53 \
--mut_rate_gene_file ${Images_path}/mutRate/MutRate.tsv \
--mut_rate_point_file ${Images_path}/mutRate/MutRate.RecurrentPoint.tsv \
--images_path ${Images_path}/mutRatePlot

## TP53出现一次的突变和多次出现的突变样本，其突变负荷的差异
${Rscript} ${scripts_path}/driverGene/mutBurden_plot.RecurrentPoint.TP53.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--images_path ${Images_path}/mutBurden

## Trunk突变和Private突变突变负荷的比较
${Rscript} ${scripts_path}/driverGene/mutBurden_plot.ShareMut.TP53.R \
--input_file ${baseTable_path}/STAD_Info.addBurden.tsv \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--images_path ${Images_path}/mutBurden

####################################################################################
## 合并关键驱动基因集合
## 我们鉴定的 + 已报道的胃癌驱动基因
echo "Gene_Symbol" > ${work_dir}/public_ref/importTantGene.list
cat ${work_dir}/mutsig_check/smg.list ${work_dir}/public_ref/SMG_sort.list | grep -v Gene_Symbol |\
sort -u >> ${work_dir}/public_ref/importTantGene.list



####################################################################################
## 肿瘤异质性的计算
## 排除IGC和DGC均有的样本
## 计算IM_IGC和IM_DGC的瘤内异质性
${Rscript} ${scripts_path}/tree/ComputeHeterogeneity.OnlyHeterogeneity.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--gene_list ${work_dir}/public_ref/importTantGene.list \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${Images_path}/ITH

## 以共享驱动突变定义两种亚型，判断瘤内异质性
${Rscript} ${scripts_path}/tree/ComputeHeterogeneity.TrunkDriver.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--gene_list ${work_dir}/public_ref/importTantGene.list \
--preGene_list ${work_dir}/mutsig_check/im_smg.list \
--ith_file ${Images_path}/ITH/ITH.compute.uniqueNormal.tsv \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${Images_path}/ITH


####################################################################################
## 构建系统发生树
##################################### 计算每个人样本的数量
## 2个人构建进化树纳入所有突变不考虑vaf
## 超过3个人构建进化树需考虑vaf使树更可靠
${Rscript} ${scripts_path}/tree/computSampleNum.R \
--info_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--out_file ${tree_path}/tumor_normal.class.forTree.list

## 描述每个样本的driver基因VAF情况以及对应样本的突变
## 为后期检查进化树
${Rscript} ${scripts_path}/tree/GetDriverEverySample.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.tsv \
--ccf_msi_file ${MutationTime_path}/result/All_CCF_mutTime.MSI.tsv \
--gene_list ${work_dir}/public_ref/importTantGene.list \
--sample_info ${config_path}/tumor_normal.class.MSS_MSI.list \
--out_path ${Images_path}/DriverEverySample \
--class_order_file ${config_path}/Class_order_sub.list 

##################################### 环境配置
## 系统发生树
## 环境在950上
## 拷过去操作
export PROOT_NO_SECCOMP=1
HOME=/public/user/xxf2019

${HOME}/udocker run -v /public:/public treeomics bash

##################################### 以下操作在udocker里面进行
Xvfb -ac :11 -screen 0 1280x1024x8 &
export  DISPLAY=:11

## 环境配置
export HOME=/public/user/xxf2019
export work_dir=~/20220915_gastric_multiple/dna_combine
source ${work_dir}/config/config.sh

##################################### 连接需要展示的基因集

mv /treeomics/input/SMG_sort.txt /treeomics/input/SMG_sort.bk.txt
cp -rf ${work_dir}/images/selectGCClone/GCClone_gene.all_record.list /treeomics/input/SMG_sort.txt
echo "BMP6" >> /treeomics/input/SMG_sort.txt
echo "MUC6" >> /treeomics/input/SMG_sort.txt
echo "CFTR" >> /treeomics/input/SMG_sort.txt

##################################### 跑进化树
HOME=/
## conda install -c bioconda pyensembl
## pyensembl install --release 75 --species homo_sapiens
## 使用纳入所有突变的配置文件
## 当样本数量小于3个时
## 构建进化树，纳入所有突变
cp -f /root/treeomics/treeomics/patient.py.revise /root/treeomics/treeomics/patient.py

export min_vaf=0.0001
export error=0.00000001
cat ${tree_path}/tumor_normal.class.forTree.list | grep -v Normal | awk -F'\t' '{if($8<=2)print $2}' | sort -u | xargs -P 10 -I Normal sh -c '
echo Normal
sh ${scripts_path}/tree/Treeomics_Tree.sh Normal ${config_path} ${min_vaf} ${error}
'

## 当含有大于2个样本的时候，纳入所有突变会造成进化树构建不准确
cp -f /root/treeomics/treeomics/patient.py.raw /root/treeomics/treeomics/patient.py 
export min_vaf=0.0001
export error=0.00000001
cat ${tree_path}/tumor_normal.class.forTree.list | grep -v Normal | awk -F'\t' '{if($8>2)print $2}' | sort -u  | xargs -P 10 -I Normal sh -c '
echo Normal
sh ${scripts_path}/tree/Treeomics_Tree.sh Normal ${config_path} ${min_vaf} ${error}
'

<<EOF
## 五个IGC和DGC都有的样本，重新跑驱动
cp -f /root/treeomics/treeomics/patient.py.raw /root/treeomics/treeomics/patient.py 
export min_vaf=0.0001
export error=0.00000001
cat ${tree_path}/tumor_normal.class.forTree.list | grep -w "IM + IGC + DGC" | grep -v Normal | awk -F'\t' '{if($8>2)print $2}' | sort -u  | xargs -P 20 -I Normal sh -c '
echo Normal
sh ${scripts_path}/tree/Treeomics_Tree.sh Normal ${config_path} ${min_vaf} ${error}
'
EOF

##################################### 拷贝文件
rm -rf ${tree_path}/Tree_file
mkdir -p ${tree_path}/Tree_file

cat ${tree_path}/tumor_normal.class.forTree.list | grep -v Normal | awk -F'\t' '{print $2}' | \
sort -u | xargs -P 10 -I Normal sh -c '
echo Normal
sh ${scripts_path}/tree/Treeomics_moveTree.sh Normal ${config_path}
'

## 对于JZ563B
Normal=JZ563B
cp ${tree_path}/${Normal}/*_mlhtree_2.pdf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf


tree_sample=`ls ${tree_path}/Tree_file/ | grep pdf  | awk -F'_mlh' '{print $1}' | tr '\n' '|' | sed 's/|$//'`
cat ${tree_path}/tumor_normal.class.forTree.list  | grep -w -v -E ${tree_sample}

##################################### 进行分类
<<EOF
mkdir -p ${tree_path}/TreeClass/NoTrunkDriver
mkdir -p ${tree_path}/TreeClass/TrunkDriver

for id in `cat ${Images_path}/ITH/DriverClass.tsv | grep -v Normal | awk '{print $1}'`
do
echo $id
normal=`cat ${config_path}/tumor_normal.class.list | grep ${id} | awk '{print $2}' | sort -u`
tree_class=`cat ${Images_path}/ITH/DriverClass.tsv | grep -w ${id} | awk -F'\t' '{print $3}'`

ln -snf ${tree_path}/Tree_file/${normal}_mlhtree.pdf ${tree_path}/TreeClass/${tree_class}/${id}_mlhtree.pdf
ln -snf ${tree_path}/Tree_file/${normal}_variants.csv ${tree_path}/TreeClass/${tree_class}/${id}_variants.csv

## 驱动基因的CCF变化
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${tree_path}/TreeClass/${tree_class}/${id}_Driver.pdf
done
EOF

##################################### 进化树完成人工检查以后，重新链接
for id in `cat ${Images_path}/ITH/DriverClass.tsv | grep -v Normal | awk '{print $1}'`
do
echo $id
normal=`cat ${config_path}/tumor_normal.class.list | grep ${id} | awk '{print $2}' | sort -u`
tree_class=`cat ${Images_path}/ITH/DriverClass.tsv | grep -w ${id} | awk -F'\t' '{print $3}'`

## 驱动基因的CCF变化
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${tree_path}/TreeClass_Revise/${tree_class}/${id}_Driver.pdf
done

####################################################################################
## 基于pyclone的方法判断克隆选择
## 最后未采用
sh ${scripts_path}/module/pyclone_selectclone.sh

####################################################################################
## 20230424
## 看所有鉴定的基因的选择特征
####################################################################################
## IGC和DGC均共享的驱动基因
echo Gene_Symbol > ${mutsig_check_path}/share_smg.list
cat ${mutsig_check_path}/igc_smg.list ${mutsig_check_path}/dgc_smg.list | sort  | uniq -c | awk -F' ' '{if($1 > 1) print $2}'  | grep -v Gene_Symbol \
>> ${mutsig_check_path}/share_smg.list

report_gene=`cat ${mutsig_check_path}/report.list | tr '\n' '|' | sed 's/|$//'`
cat ${mutsig_check_path}/igc_smg.list | grep -E "${report_gene}|Gene_Symbol" > ${mutsig_check_path}/igc_smg.report.list
cat ${mutsig_check_path}/dgc_smg.list | grep -E "${report_gene}|Gene_Symbol" > ${mutsig_check_path}/dgc_smg.report.list
cat ${mutsig_check_path}/share_smg.list | grep -E "${report_gene}|Gene_Symbol" > ${mutsig_check_path}/share_smg.report.list

mkdir -p ${Images_path}/DriverReport

## 所有已报道的驱动基因
class_type_list=("IGC" "DGC" "IGC_DGC")
for class_type in ${class_type_list[@]}
do
${Rscript} ${scripts_path}/plot/waterfull_smg.SortBySample.R \
--type ${class_type} \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--images_path ${Images_path}/DriverReport \
--info_file ${config_path}/tumor_normal.class.list \
--class_order_file ${config_path}/Class_order.list \
--class_order_sub_file ${config_path}/Class_order_sub.list \
--igc_gene_list ${mutsig_check_path}/igc_smg.list \
--dgc_gene_list ${mutsig_check_path}/dgc_smg.list \
--all_gene_list ${mutsig_check_path}/im_smg.list \
--tp53_pre_file ${Images_path}/lollipop/TP53_NMU/TP53.PreCancerous.UniqueNormal.tsv \
--tp53_cancer_file ${Images_path}/lollipop/TP53_NMU/TP53.Cancerous.UniqueNormal.tsv \
--apc_pre_file ${Images_path}/lollipop/APC_NMU/APC.PreCancerous.UniqueNormal.tsv \
--apc_cancer_file ${Images_path}/lollipop/APC_NMU/APC.Cancerous.UniqueNormal.tsv 
done

## 描述每个样本的driver基因VAF情况以及对应样本的突变
## 新的样本列表
${Rscript} ${scripts_path}/tree/GetDriverEverySample.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${mutsig_check_path}/smg.list  \
--sample_info ${config_path}/tumor_normal.class.MSS_MSI.list \
--out_path ${Images_path}/DriverReport/DriverEverySample \
--class_order_file ${config_path}/Class_order_sub.list 

## 每个基因，其所在的所有进化树，所有基因的CCF改变，所在样本的克隆演化树
Variant_Types="Missense_Mutation|Nonsense_Mutation|Frame_Shift_Ins|Frame_Shift_Del|In_Frame_Ins|In_Frame_Del|Splice_Site|Nonstop_Mutation"

for gene in `cat ${mutsig_check_path}/igc_smg.list ${mutsig_check_path}/dgc_smg.list | grep -v Gene_Symbol`
do
## 判断IGC和DGC应该是自己进行判断
mkdir -p ${Images_path}/DriverReport/${gene}
## 存在突变的样本
for sample in `cat ${MutationTime_path}/result/All_CCF_mutTime.tsv | grep -w ${gene} | grep -E -w ${Variant_Types} | awk -F'\t' '{print $1}' | sort -u`
do
Normal=`cat ${config_path}/tumor_normal.class.list | grep -w ${sample} | awk -F'\t' '{print $2}' | sort -u`
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${sample} | awk -F'\t' '{print $1}' | sort -u`
## 链接进化树
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${Images_path}/DriverReport/${gene}/
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${Images_path}/DriverReport/${gene}/
## 链接驱动基因的CCF
ln -snf ${Images_path}/DriverReport/DriverEverySample/${id}_Driver.pdf ${Images_path}/DriverReport/${gene}/${Normal}_Driver.pdf
done
done

####################################################################################
## 观察胃癌驱动基因
## 以下原则鉴定促进胃癌发生的关键突变
## IM中存在，GC中必定保留，且GC中该基因超过80%均为主克隆
## IM中不存在，GC中该基因均为主克隆：
## IM存在GC未保留的突变，IGC/DGC特异型驱动突变
## 仅在IGC或DGC的IM中独立存在，在另一种胃癌亚型中均为主克隆突变
## 基因均需满足以下条件：
# 存在至少2个样本突变不与经典驱动基因TP53、APC、PIK3CA、SMAD4、RHOA、CDH1、ERBB2、FBXW7共同出现
${Rscript} ${scripts_path}/tree/Selectclone_annotation.R \
--smg_gene_file ${mutsig_check_path}/report_chooseclone.list \
--output_path ${Images_path}/selectGCClone \
--clone_t 0.6 \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--class_order ${config_path}/Class_order.list \
--class_a_order ${config_path}/Class_order_sub.list \
--sample_file ${config_path}/tumor_normal.class.list

## 潜在进化选择的基因
echo "Gene_Symbol" > ${Images_path}/selectGCClone/choose_gene.list
cat ${Images_path}/selectGCClone/GCClone_gene.reord.tsv | grep -v NoChoose | grep -v Hugo_Symbol | awk -F'\t' '{print $1}' \
>> ${Images_path}/selectGCClone/choose_gene.list
mkdir -p ${Images_path}/DriverReport/Choose

## 观察鉴定的基因的演化
class_type_list=("IGC" "DGC" "IGC_DGC")
for class_type in ${class_type_list[@]}
do
${Rscript} ${scripts_path}/plot/waterfull_smg.SortBySample.R \
--type ${class_type} \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--images_path ${Images_path}/DriverReport/Choose \
--info_file ${config_path}/tumor_normal.class.list \
--class_order_file ${config_path}/Class_order.list \
--class_order_sub_file ${config_path}/Class_order_sub.list \
--igc_gene_list ${Images_path}/selectGCClone/choose_gene.list \
--dgc_gene_list ${Images_path}/selectGCClone/choose_gene.list \
--all_gene_list ${Images_path}/selectGCClone/choose_gene.list \
--tp53_pre_file ${Images_path}/lollipop/TP53_NMU/TP53.PreCancerous.UniqueNormal.tsv \
--tp53_cancer_file ${Images_path}/lollipop/TP53_NMU/TP53.Cancerous.UniqueNormal.tsv \
--apc_pre_file ${Images_path}/lollipop/APC_NMU/APC.PreCancerous.UniqueNormal.tsv \
--apc_cancer_file ${Images_path}/lollipop/APC_NMU/APC.Cancerous.UniqueNormal.tsv 
done

## 经过人工检查
## 得到的可靠驱动基因如下
## ${Images_path}/selectGCClone/GCClone_gene.IGC_manual.list
## ${Images_path}/selectGCClone/GCClone_gene.DGC_manual.list
## ${Images_path}/selectGCClone/GCClone_gene.IGC_DGC_manual.list
mkdir -p ${Images_path}/selectGCClone/ChooseManual

class_type_list=("IGC" "DGC" "IGC_DGC")
for class_type in ${class_type_list[@]}
do
${Rscript} ${scripts_path}/plot/waterfull_smg.SortBySample.R \
--type ${class_type} \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--images_path ${Images_path}/selectGCClone/ChooseManual \
--info_file ${config_path}/tumor_normal.class.list \
--class_order_file ${config_path}/Class_order.list \
--class_order_sub_file ${config_path}/Class_order_sub.list \
--igc_gene_list ${Images_path}/selectGCClone/GCClone_gene.IGC_manual.list \
--dgc_gene_list ${Images_path}/selectGCClone/GCClone_gene.DGC_manual.list \
--all_gene_list ${Images_path}/selectGCClone/GCClone_gene.IGC_DGC_manual.list \
--tp53_pre_file ${Images_path}/lollipop/TP53_NMU/TP53.PreCancerous.UniqueNormal.tsv \
--tp53_cancer_file ${Images_path}/lollipop/TP53_NMU/TP53.Cancerous.UniqueNormal.tsv \
--apc_pre_file ${Images_path}/lollipop/APC_NMU/APC.PreCancerous.UniqueNormal.tsv \
--apc_cancer_file ${Images_path}/lollipop/APC_NMU/APC.Cancerous.UniqueNormal.tsv 
done

## 描述每个样本的driver基因VAF情况以及对应样本的突变
## 新的样本列表
${Rscript} ${scripts_path}/tree/GetDriverEverySample.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${Images_path}/selectGCClone/GCClone_gene.all_record.list  \
--sample_info ${config_path}/tumor_normal.class.MSS_MSI.list \
--out_path ${Images_path}/DriverEverySample \
--class_order_file ${config_path}/Class_order_sub.list 

## 每个基因，其所在的所有进化树，所有基因的CCF改变，所在样本的克隆演化树
Variant_Types="Missense_Mutation|Nonsense_Mutation|Frame_Shift_Ins|Frame_Shift_Del|In_Frame_Ins|In_Frame_Del|Splice_Site|Nonstop_Mutation"
for gene in `cat ${Images_path}/selectGCClone/GCClone_gene.all_record.list | grep -v Gene_Symbol`
do
## 判断IGC和DGC应该是自己进行判断
mkdir -p ${Images_path}/DriverChoose/${gene}
## 存在突变的样本
for sample in `cat ${MutationTime_path}/result/All_CCF_mutTime.tsv | grep -w ${gene} | grep -E -w ${Variant_Types} | awk -F'\t' '{print $1}' | sort -u`
do
Normal=`cat ${config_path}/tumor_normal.class.list | grep -w ${sample} | awk -F'\t' '{print $2}' | sort -u`
id=`cat ${config_path}/tumor_normal.class.list | grep -w ${sample} | awk -F'\t' '{print $1}' | sort -u`
## 链接进化树
ln -snf ${tree_path}/Tree_file/${Normal}_mlhtree.pdf ${Images_path}/DriverChoose/${gene}/
ln -snf ${tree_path}/Tree_file/${Normal}_variants.csv ${Images_path}/DriverChoose/${gene}/
## 链接驱动基因的CCF
ln -snf ${Images_path}/DriverEverySample/${id}_Driver.pdf ${Images_path}/DriverChoose/${gene}/${Normal}_Driver.pdf
done
done

## 关键驱动基因的LOH情况
class_type_list=("All" "IGC" "DGC" )
for class_type in ${class_type_list[@]}
do
## 判断IM和GC共享以及GC中发生在早期的克隆突变其LOH相比于其它突变的情况
${Rscript} ${scripts_path}/plot/JudgeGeneDriverLOH.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${Images_path}/selectGCClone/GCClone_gene.all_record.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--type ${class_type} \
--out_path ${Images_path}/DriverChoose/GeneLOH
done

## 肿瘤异质性的计算
## 排除IGC和DGC均有的样本
## 计算IM_IGC和IM_DGC的瘤内异质性
${Rscript} ${scripts_path}/tree/ComputeHeterogeneity.OnlyHeterogeneity.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${Images_path}/ITH_CloneChoose

## 以共享驱动突变定义两种亚型，判断瘤内异质性
${Rscript} ${scripts_path}/tree/ComputeHeterogeneity.TrunkDriver.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--gene_list ${Images_path}/selectGCClone/GCClone_gene.all_record.list \
--ith_file ${Images_path}/ITH_CloneChoose/ITH.compute.uniqueNormal.tsv \
--ith_sample_file ${Images_path}/ITH_CloneChoose/ITH.compute.allSample.tsv \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${Images_path}/ITH_CloneChoose


####################################################################################
## 判断进展时间
## 整理输入文件
## 1、胃癌私有的过客突变（Silent）数量
${Rscript} ${scripts_path}/evolutionTime/prepareInput.R \
--muti_cancer ${maf_path}/All_GGA.cancer.maf \
--muti_pre ${maf_path}/All_GGA.precancer.maf \
--sample_info ${config_path}/tumor_normal.class.list \
--out_path ${Images_path}/evolutionTime

## 突变率从1-10/per year，推测时间
${Rscript_evolutionTime} ${scripts_path}/evolutionTime/timing_summary.R \
--input_file ${Images_path}/evolutionTime/sample_mutNum.tsv \
--code_path ${scripts_path}/evolutionTime/ipmn-timing-master \
--plot_id_file ${config_path}/plotID.list \
--out_path ${Images_path}/evolutionTime

## 时间比较
## 1、IM进展为IGC的时间，IM进展为DGC的时间
## 2、发生Trunk的TP53、APC、CDH1、PIK3CA基因突变是否会加速IM进展为胃癌的时间
## 3、特定基因的组合
${Rscript} ${scripts_path}/evolutionTime/compareTime.R \
--ccf_file ${MutationTime_path}/result/All_CCF_mutTime.addShare.tsv \
--gene_list ${Images_path}/selectGCClone/GCClone_gene.all_record.list  \
--sample_info ${config_path}/tumor_normal.class.list \
--input_file ${Images_path}/evolutionTime/timing_molecular_clock.tsv \
--out_path ${Images_path}/evolutionTime